{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 读取数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv('./data/train_set.csv')#训练集\n",
    "\n",
    "test = pd.read_csv('./data/test_set.csv')#测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>article</th>\n",
       "      <th>word_seg</th>\n",
       "      <th>class</th>\n",
       "      <th>label</th>\n",
       "      <th>class_trans</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>7368 1252069 365865 755561 1044285 129532 1053...</td>\n",
       "      <td>816903 597526 520477 1179558 1033823 758724 63...</td>\n",
       "      <td>14</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>581131 165432 7368 957317 1197553 570900 33659...</td>\n",
       "      <td>90540 816903 441039 816903 569138 816903 10343...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>7368 87936 40494 490286 856005 641588 145611 1...</td>\n",
       "      <td>816903 1012629 957974 1033823 328210 947200 65...</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>299237 760651 299237 887082 159592 556634 7489...</td>\n",
       "      <td>563568 1239563 680125 780219 782805 1033823 19...</td>\n",
       "      <td>13</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>7368 7368 7368 865510 7368 396966 995243 37685...</td>\n",
       "      <td>816903 816903 816903 139132 816903 312320 1103...</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                            article  \\\n",
       "0   0  7368 1252069 365865 755561 1044285 129532 1053...   \n",
       "1   1  581131 165432 7368 957317 1197553 570900 33659...   \n",
       "2   2  7368 87936 40494 490286 856005 641588 145611 1...   \n",
       "3   3  299237 760651 299237 887082 159592 556634 7489...   \n",
       "4   4  7368 7368 7368 865510 7368 396966 995243 37685...   \n",
       "\n",
       "                                            word_seg  class  label  \\\n",
       "0  816903 597526 520477 1179558 1033823 758724 63...     14     13   \n",
       "1  90540 816903 441039 816903 569138 816903 10343...      3      2   \n",
       "2  816903 1012629 957974 1033823 328210 947200 65...     12     11   \n",
       "3  563568 1239563 680125 780219 782805 1033823 19...     13     12   \n",
       "4  816903 816903 816903 139132 816903 312320 1103...     12     11   \n",
       "\n",
       "   class_trans  \n",
       "0           14  \n",
       "1            3  \n",
       "2           12  \n",
       "3           13  \n",
       "4           12  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TFIDF构建文本特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_vec = TfidfVectorizer(analyzer='word',\n",
    "            ngram_range=(1,2),#(1,3)\n",
    "            min_df=3,  # 4  5\n",
    "            max_df=0.9, # 0.95 1.0 \n",
    "            use_idf=True,\n",
    "            smooth_idf=True, \n",
    "            sublinear_tf=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_term_doc = word_vec.fit_transform(train['word_seg'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_term_doc = word_vec.transform(test['word_seg'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<102277x2820641 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 75739932 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_term_doc#sparse matrix 102277x2820641  np.array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<102277x2820641 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 73582169 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_term_doc  #102277x2820641 稀疏矩阵可以直接在逻辑回归里面调用"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 逻辑回归构建模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "[1,2,3,4,5,6,7...,19]  -> [0,1,2,...18]\n",
    "[\"a\",'b',...]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import LabelEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "lb = LabelEncoder()\n",
    "train['label'] = lb.fit_transform(train['class'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([13,  2, 11, ..., 11,  3, 10])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lb.fit_transform(train['class'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>article</th>\n",
       "      <th>word_seg</th>\n",
       "      <th>class</th>\n",
       "      <th>label</th>\n",
       "      <th>class_trans</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>7368 1252069 365865 755561 1044285 129532 1053...</td>\n",
       "      <td>816903 597526 520477 1179558 1033823 758724 63...</td>\n",
       "      <td>14</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>581131 165432 7368 957317 1197553 570900 33659...</td>\n",
       "      <td>90540 816903 441039 816903 569138 816903 10343...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>7368 87936 40494 490286 856005 641588 145611 1...</td>\n",
       "      <td>816903 1012629 957974 1033823 328210 947200 65...</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>299237 760651 299237 887082 159592 556634 7489...</td>\n",
       "      <td>563568 1239563 680125 780219 782805 1033823 19...</td>\n",
       "      <td>13</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>7368 7368 7368 865510 7368 396966 995243 37685...</td>\n",
       "      <td>816903 816903 816903 139132 816903 312320 1103...</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                            article  \\\n",
       "0   0  7368 1252069 365865 755561 1044285 129532 1053...   \n",
       "1   1  581131 165432 7368 957317 1197553 570900 33659...   \n",
       "2   2  7368 87936 40494 490286 856005 641588 145611 1...   \n",
       "3   3  299237 760651 299237 887082 159592 556634 7489...   \n",
       "4   4  7368 7368 7368 865510 7368 396966 995243 37685...   \n",
       "\n",
       "                                            word_seg  class  label  \\\n",
       "0  816903 597526 520477 1179558 1033823 758724 63...     14     13   \n",
       "1  90540 816903 441039 816903 569138 816903 10343...      3      2   \n",
       "2  816903 1012629 957974 1033823 328210 947200 65...     12     11   \n",
       "3  563568 1239563 680125 780219 782805 1033823 19...     13     12   \n",
       "4  816903 816903 816903 139132 816903 312320 1103...     12     11   \n",
       "\n",
       "   class_trans  \n",
       "0           14  \n",
       "1            3  \n",
       "2           12  \n",
       "3           13  \n",
       "4           12  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "train['class_trans'] = lb.inverse_transform(train['label'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>article</th>\n",
       "      <th>word_seg</th>\n",
       "      <th>class</th>\n",
       "      <th>label</th>\n",
       "      <th>class_trans</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>7368 1252069 365865 755561 1044285 129532 1053...</td>\n",
       "      <td>816903 597526 520477 1179558 1033823 758724 63...</td>\n",
       "      <td>14</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>581131 165432 7368 957317 1197553 570900 33659...</td>\n",
       "      <td>90540 816903 441039 816903 569138 816903 10343...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>7368 87936 40494 490286 856005 641588 145611 1...</td>\n",
       "      <td>816903 1012629 957974 1033823 328210 947200 65...</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>299237 760651 299237 887082 159592 556634 7489...</td>\n",
       "      <td>563568 1239563 680125 780219 782805 1033823 19...</td>\n",
       "      <td>13</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>7368 7368 7368 865510 7368 396966 995243 37685...</td>\n",
       "      <td>816903 816903 816903 139132 816903 312320 1103...</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                            article  \\\n",
       "0   0  7368 1252069 365865 755561 1044285 129532 1053...   \n",
       "1   1  581131 165432 7368 957317 1197553 570900 33659...   \n",
       "2   2  7368 87936 40494 490286 856005 641588 145611 1...   \n",
       "3   3  299237 760651 299237 887082 159592 556634 7489...   \n",
       "4   4  7368 7368 7368 865510 7368 396966 995243 37685...   \n",
       "\n",
       "                                            word_seg  class  label  \\\n",
       "0  816903 597526 520477 1179558 1033823 758724 63...     14     13   \n",
       "1  90540 816903 441039 816903 569138 816903 10343...      3      2   \n",
       "2  816903 1012629 957974 1033823 328210 947200 65...     12     11   \n",
       "3  563568 1239563 680125 780219 782805 1033823 19...     13     12   \n",
       "4  816903 816903 816903 139132 816903 312320 1103...     12     11   \n",
       "\n",
       "   class_trans  \n",
       "0           14  \n",
       "1            3  \n",
       "2           12  \n",
       "3           13  \n",
       "4           12  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(102277, 6)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train[train['class'] == train['class_trans']].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/hcq/miniconda3/envs/python3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/home/hcq/miniconda3/envs/python3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
      "  \"this warning.\", FutureWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=4, class_weight=None, dual=True, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='warn',\n",
       "          n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
       "          tol=0.0001, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "clf = LogisticRegression(C=4, dual=True) # c 1.0  2 3 4 5  200w 10w\n",
    "clf.fit(train_term_doc,train['label'])#[0,18]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_prob = clf.predict_proba(test_term_doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.00959619, 0.00508457, 0.00357474, 0.01217717, 0.86841463,\n",
       "       0.00547256, 0.00854222, 0.0035889 , 0.00909855, 0.00317351,\n",
       "       0.0074878 , 0.00535788, 0.0062232 , 0.02057748, 0.01153442,\n",
       "       0.00162819, 0.00505501, 0.00347785, 0.00993513])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_prob[0]# 4 +1 =5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 结果提交"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_pred = np.argmax(test_prob,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 4,  3, 12, ...,  5, 13, 12])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "test['class'] = lb.inverse_transform(test_pred)#[1,...19]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>article</th>\n",
       "      <th>word_seg</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>7368 146447 316564 42610 55736 297797 93042 53...</td>\n",
       "      <td>816903 565958 726082 764656 335008 75094 20282...</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>985531 473628 1044285 1121849 206763 462208 11...</td>\n",
       "      <td>729468 520477 529032 101368 335130 520477 1113...</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>7368 7368 7368 7368 7368 7368 7368 7368 7368 7...</td>\n",
       "      <td>816903 816903 816903 816903 816903 816903 8169...</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>529819 1226459 856005 1177293 663773 272235 93...</td>\n",
       "      <td>231664 1033823 524850 330478 507199 520477 618...</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>42610 1252069 1077049 955883 1125260 1044285 2...</td>\n",
       "      <td>545370 379223 162767 520477 1194630 1197475 11...</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                            article  \\\n",
       "0   0  7368 146447 316564 42610 55736 297797 93042 53...   \n",
       "1   1  985531 473628 1044285 1121849 206763 462208 11...   \n",
       "2   2  7368 7368 7368 7368 7368 7368 7368 7368 7368 7...   \n",
       "3   3  529819 1226459 856005 1177293 663773 272235 93...   \n",
       "4   4  42610 1252069 1077049 955883 1125260 1044285 2...   \n",
       "\n",
       "                                            word_seg  class  \n",
       "0  816903 565958 726082 764656 335008 75094 20282...      5  \n",
       "1  729468 520477 529032 101368 335130 520477 1113...      4  \n",
       "2  816903 816903 816903 816903 816903 816903 8169...     13  \n",
       "3  231664 1033823 524850 330478 507199 520477 618...      4  \n",
       "4  545370 379223 162767 520477 1194630 1197475 11...      5  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "test[[\"id\",\"class\"]].to_csv(\"submission_baseline.csv\",index=False,header=True,encoding='utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
